47th Australian Parliament basic statistics¶

Note you will have needed to run the download.sh script to get the data.

In [1]:
import os
import pathlib

# set plotly express to white theme
from dotenv import load_dotenv
from sqlalchemy import create_engine
import matplotlib.pyplot as plt

%matplotlib inline

load_dotenv("../.env")

USE_POSTGRES = os.environ.get('DATABASE_USERNAME',False)
# USE_POSTGRES = False

root_data_dir = pathlib.Path("..").resolve() / "data"
ext_data_dir = root_data_dir / "external"
geopackage = root_data_dir / "aped.gpkg"
if USE_POSTGRES:
  engine = create_engine(
      f"postgresql+psycopg://{os.environ.get('DATABASE_USERNAME')}:{os.environ.get('DATABASE_PASSWORD')}@localhost:5432/{os.environ.get('DATABASE_NAME')}")
else:
  import sqlite3
  engine = sqlite3.connect(geopackage)
In [2]:
import pandas as pd
members = pd.read_sql("SELECT * from member_aph_47", engine)
members
Out[2]:
id member_id member party party_abbrev district is_senator is_representative mp_id start ... RepresentedMinistries RepresentedShadowMinistries ParliamentaryPositions Honours Occupations SecondaryOccupations Qualifications ElectorateService PartyParliamentaryService PartyCommitteeService
0 1 128 Eric Abetz Coalition LNP Tasmania True False N26 1994-02-22 ... [Parliamentary Secretary, Special Minister of ... [Shadow Minister, Shadow Minister, Shadow Mini... [] {} [Barrister and solicitor] [Barrister, Legal, Social and Welfare Professi... [Bachelor of Arts, University of Tasmania, Bac... [] [{'RoSId': 26985, 'RoSType': 'Parliamentary Se... [{'RoSId': 943, 'RoSType': 'Committee Service'...
1 2 182 Mary Doyle Australian Labor Party ALP Aston False True 299962 2023-04-01 ... [] [] [] {} [] [] [] [] [] []
2 3 54 Anthony Albanese Australian Labor Party ALP Grayndler False True R36 1996-03-02 ... [Cabinet Minister, Cabinet Minister, Cabinet M... [Shadow Parliamentary Secretary, Shadow Parlia... [] {} [Senior Policy Adviser to the Premier, the Hon... [Credit or Loans Officer, Numerical Clerks, Cl... [Bachelor of Economics, University of Sydney] [{'Electorate': 'Grayndler', 'State': 'New Sou... [{'RoSId': 27149, 'RoSType': 'Parliamentary Se... [{'RoSId': 1127, 'RoSType': 'Committee Service...
3 4 187 John Alexander Coalition LNP Bennelong False True M3M 2010-08-21 ... [] [] [] {"Australian Sports Medal, 2000","Awarded a Me... [Federation Cup Captain/Coach, 2005., Sports c... [Tennis Coach, Sportspersons nec, Chief Execut... [] [{'Electorate': 'Bennelong', 'State': 'New Sou... [{'RoSId': 27156, 'RoSType': 'Parliamentary Se... [{'RoSId': 1133, 'RoSType': 'Committee Service...
4 5 52 Katie Allen Coalition LNP Higgins False True 282986 2019-05-18 ... [] [] [] {"Fellow of the Australian Academy of Health a... [Scientific Advisory Board Member at Before Br... [Life Scientists nec, Research and Development... [Doctor of Philosophy, University of Melbourne... [{'Electorate': 'Higgins', 'State': 'Victoria'... [{'RoSId': 54212, 'RoSType': 'Parliamentary Se... [{'RoSId': 54504, 'RoSType': 'Committee Servic...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
278 279 205 Jason Wood Coalition LNP La Trobe False True E0F 2013-09-07 ... [Assistant Minister] [Shadow Parliamentary Secretary, Shadow Parlia... [] {"Awarded a Centenary Medal, 01.01.2001","Quee... [Senior Sergeant in the Counter Terrorism Coor... [Police Officer, Detective, Protective Service... [Graduate Diploma Innovation Service Managemen... [{'Electorate': 'La Trobe', 'State': 'Victoria... [{'RoSId': 27705, 'RoSType': 'Parliamentary Se... [{'RoSId': 2472, 'RoSType': 'Committee Service...
279 280 265 Ken Wyatt Coalition LNP Hasluck False True M3A 2010-08-21 ... [Assistant Minister, Assistant Minister, Assis... [] [] {"Indigenous Education Warrior Award, World In... [Director for Aboriginal Health at the Departm... [Management Consultant, Policy and Planning Ma... [Bachelor of Education, Churchlands College of... [{'Electorate': 'Hasluck', 'State': 'Western A... [{'RoSId': 27710, 'RoSType': 'Parliamentary Se... [{'RoSId': 2479, 'RoSType': 'Committee Service...
280 281 242 Terry Young Coalition LNP Longman False True 201906 2019-05-18 ... [] [] [Member of the Speaker's Panel] {} [Franchisee, Drummond Golf, Maroochydore from ... [Other Factory Process Workers nfd, Service St... [] [{'Electorate': 'Longman', 'State': 'Queenslan... [{'RoSId': 54008, 'RoSType': 'Parliamentary Se... [{'RoSId': 54534, 'RoSType': 'Committee Servic...
281 282 230 Tony Zappia Australian Labor Party ALP Makin False True HWB 2007-11-24 ... [] [Shadow Parliamentary Secretary, Shadow Parlia... [] {} [Fitness centre manager from 1981 to 2007., Re... [Bank Worker, Fitness Centre Manager, Numerica... [] [{'Electorate': 'Makin', 'State': 'South Austr... [{'RoSId': 27719, 'RoSType': 'Parliamentary Se... [{'RoSId': 2501, 'RoSType': 'Committee Service...
282 283 63 Trent Zimmerman Coalition LNP North Sydney False True 203092 2015-12-05 ... [] [] [Member of the Speaker's Panel] {} [Deputy Chief Executive Officer and Director o... [Information and Organisation Professionals ne... [] [{'Electorate': 'North Sydney', 'State': 'New ... [{'RoSId': 27723, 'RoSType': 'Parliamentary Se... [{'RoSId': 2508, 'RoSType': 'Committee Service...

283 rows × 62 columns

In [3]:
party_counts = members.groupby(["party", "Gender"]).size().reset_index(name='count')
party_counts["%"] = (party_counts["count"] / party_counts.groupby("party")["count"].transform("sum")) * 100
party_counts
Out[3]:
party Gender count %
0 Australian Greens Female 10 55.555556
1 Australian Greens Male 8 44.444444
2 Australian Labor Party Female 60 51.282051
3 Australian Labor Party Male 57 48.717949
4 Centre Alliance Female 1 50.000000
5 Centre Alliance Male 1 50.000000
6 Coalition Female 33 26.400000
7 Coalition Male 92 73.600000
8 Independent Female 9 69.230769
9 Independent Male 4 30.769231
10 Jacqui Lambie Network Female 2 100.000000
11 Katter's Australian Party Male 1 100.000000
12 Liberal Democratic Party Female 1 100.000000
13 Pauline Hanson's One Nation Female 1 50.000000
14 Pauline Hanson's One Nation Male 1 50.000000
15 United Australia Party Male 1 100.000000
16 United Australia Party [2018] Male 1 100.000000
In [4]:
# create a cut of members by age
from datetime import date

today = date.today()
members["age"] = pd.to_datetime(members["dob"]).apply(
    lambda x: today.year - x.year - ((today.month, today.day) < (x.month, x.day)))
members["age_group"] = pd.cut(members["age"], bins=[18, 21, 30, 40, 50, 60, 70, 80, 90, 159],
                              labels=["18-20", "21-30", "31-40", "41-50", "51-60", "61-70", "71-80", "81-90", "91+"],
                              ordered=False)
age_counts = members.groupby(["Gender", "chamber", "age_group"]).size().reset_index(name='count')
age_counts
# create a plot of members by age_counts
import plotly.express as px

px.bar(age_counts, x="age_group", y="count", color="Gender", barmode="group", facet_col="chamber",
       title="Age breakdown of Australian politicians by chamber")
In [5]:
# two female memebers missing 2 male members 4 male senators
chamber_counts = members.groupby(["Gender", "chamber"]).size().reset_index(name='count')
px.bar(age_counts, x="chamber", y="count", color="Gender", barmode="group", title="Gender breakdown by chamber")
In [6]:
# calculate members age using their date of birth grouped by party
from datetime import date

today = date.today()
members["age"] = pd.to_datetime(members["dob"]).apply(
    lambda x: today.year - x.year - ((today.month, today.day) < (x.month, x.day)))
members.groupby(["party"]).agg({"age": ["mean", "std"]})
Out[6]:
age
mean std
party
Australian Greens 50.333333 11.812257
Australian Labor Party 52.743590 9.897016
Centre Alliance 57.500000 10.606602
Coalition 52.983871 8.529227
Independent 51.692308 6.848170
Jacqui Lambie Network 52.000000 0.000000
Katter's Australian Party 77.000000 NaN
Liberal Democratic Party 55.000000 NaN
Pauline Hanson's One Nation 67.500000 0.707107
United Australia Party 39.000000 NaN
United Australia Party [2018] 59.000000 NaN
In [7]:
import plotly.express as px

fig = px.bar(party_counts, x="party", y="%", color="Gender",
             title="Gender breakdown of Australian politicians by party")
fig.show()
In [8]:
members_education = pd.read_sql("""
SELECT * from member_secondary_school_education_47
""", engine)
In [9]:
members_education["al_school_sector"].value_counts()
Out[9]:
al_school_sector
Government     145
Independent     97
Catholic        67
Name: count, dtype: int64
In [10]:
school_sector_counts = members_education.drop_duplicates(["member", "school_sector"]).groupby(
    ["party", "school_sector"]).size().reset_index(name='count')
school_sector_counts
Out[10]:
party school_sector count
0 Australian Greens Non-government 6
1 Australian Greens Public 12
2 Australian Labor Party Non-government 60
3 Australian Labor Party Public 52
4 Centre Alliance Non-government 2
5 Centre Alliance Public 1
6 Coalition Non-government 67
7 Coalition Public 60
8 Independent Non-government 9
9 Independent Public 5
10 Jacqui Lambie Network Non-government 1
11 Jacqui Lambie Network Public 1
12 Liberal Democratic Party Non-government 1
13 Pauline Hanson's One Nation Non-government 1
14 Pauline Hanson's One Nation Public 1
15 United Australia Party Non-government 1
16 United Australia Party [2018] Non-government 1
In [11]:
# create a plot of members school sector type by party

school_sector_counts = members_education.drop_duplicates(["member", "school_sector"]).groupby(
    ["school_sector", "party", ]).size().reset_index(name='count')
fig = px.bar(school_sector_counts, x="party", y="count", color="school_sector", barmode="group",
             title="School sector breakdown of Australian politicians by party")
fig.show()

members

In [12]:
school_sector_counts = members_education.drop_duplicates(["member", "school_sector"]).groupby(
    ["party", "school_sector"]).size().reset_index(name='count')
fig = px.bar(school_sector_counts, x="school_sector", y="count", color="party",
             title="School sector breakdown of Australian politicians by party")
fig.show()

Educational school sector split of politicians¶

Public is Government 64.4605631350012 Catholic 19.6750931104224 Independent 15.8643437545764

47th Parliament is Government 47% Catholic 20% Independent 33%

In [13]:
population_school_sector = {
    "Catholic": 19.7,
    "Government": 64.5,
    "Independent": 15.8643437545764}
pop_school_sector = pd.DataFrame.from_dict(population_school_sector, orient="index").reset_index()
pop_school_sector["group"] = "Australian Population"
pop_school_sector.columns = ["school_sector", "%", "group"]
pop_school_sector
Out[13]:
school_sector % group
0 Catholic 19.700000 Australian Population
1 Government 64.500000 Australian Population
2 Independent 15.864344 Australian Population
In [14]:
# create a plot of members as a percentage school sector

school_sector_counts = members_education.drop_duplicates(["member", "al_school_sector"]).groupby(
    ["al_school_sector"]).size().reset_index(name='count')
school_sector_counts.rename(columns={"al_school_sector": "school_sector"}, inplace=True)
school_sector_counts["%"] = (school_sector_counts["count"] / school_sector_counts["count"].sum()) * 100
school_sector_counts["group"] = "Australian Politicians"

fig = px.bar(pd.concat([school_sector_counts, pop_school_sector]), x="school_sector", y="%",
             color="group",
             barmode="group", title="School sector breakdown of Australian politicians")
fig.show()
In [17]:
# create a plot of members as a percentage school sector depending on if is_representative or senate
school_sector_counts = members_education.drop_duplicates(["member", "al_school_sector"]).groupby(
    ["chamber", "al_school_sector"]).size().reset_index(name='count')
school_sector_counts["%"] = (school_sector_counts["count"] / school_sector_counts.groupby("chamber")[
    "count"].transform("sum")) * 100
fig = px.bar(school_sector_counts, x="chamber", y="%", color="al_school_sector",
             title="School sector breakdown of Australian politicians by party", labels={"chamber": "Chamber"})
fig.show()
In [18]:
# lets extract out our acara_financial data and see which school sectors are in our data
import geopandas as gpd

load_dotenv("../.env")

if USE_POSTGRES:
  gdf = gpd.read_postgis(
      'SELECT m.*, "total enrolments" as total_students FROM member_secondary_school_education_47 m LEFT JOIN acara_school_profile_2022 a on m.acara_id = a."acara sml id"::int',
      engine, geom_col='geom')
else:
  gdf_47 = gpd.read_file(geopackage, layer="member_secondary_school_education_47")
  gdf_47 = gdf_47.convert_dtypes()
  asp_47 = gpd.read_file(geopackage, layer="acara_school_profile_2022")[["acara sml id", "total enrolments"]]
  asp_47 = asp_47.convert_dtypes()
  gdf = pd.merge(gdf_47, asp_47, left_on="acara_id", right_on="acara sml id", how="left")
  gdf.rename(columns={"total enrolments": "total_students"}, inplace=True)
gdf = gdf.convert_dtypes()
# fill missing students with min 50 students
gdf["total_students"] = gdf["total_students"].fillna('50').apply(lambda x: '50' if not x else x)
gdf["total_students"] = gdf["total_students"].astype(int)

# Group by party_abbrev and school_sector and avg total government funding
gdf["australian_government_recurrent_funding_per_student"] = gdf[
    "australian_government_recurrent_funding_per_student"].fillna(0).astype(int)
gdf["state__territory_government_recurring_funding_per_student"] = gdf[
    "state__territory_government_recurring_funding_per_student"].fillna(0).astype(int)
gdf["other_private_sources_per_student"] = gdf["other_private_sources_per_student"].fillna(0).astype(int)
gdf["total_gross_income_per_student"] = gdf["total_gross_income_per_student"].fillna(0).astype(int)
In [19]:
grouped = gdf.groupby(["party_abbrv", "school_sector"]).agg(
    {"other_private_sources_per_student": "mean", "total_gross_income_per_student": "mean"}).reset_index()
px.bar(grouped, x="party_abbrv", y="other_private_sources_per_student", color="school_sector", barmode="group")
In [20]:
px.bar(grouped, x="party_abbrv", y="total_gross_income_per_student", color="school_sector", barmode="group")
In [21]:
# Select most common secondary school from members_secondary_school APH data
most_common_schools = pd.read_sql(
    "SELECT secondaryschool,count(*) as members_attending  FROM members_secondary_school WHERE secondaryschool != '' and secondaryschool is not null GROUP BY secondaryschool ORDER BY count(*) desc limit 10",
    engine)
most_common_schools
Out[21]:
secondaryschool members_attending
0 Scotch College 5
1 St Ignatius College 4
2 St Peter's College 4
3 Wesley College 4
4 Marist College 3
5 Geelong Grammar School 3
6 Norwood High School 2
7 Aquinas College 2
8 Christian Brothers' College 2
9 Scotch College Melbourne 2

Members attending same school¶

In [22]:
# select those who attended the same school get value counts of name and show members who attended the same school
school_counts = members_education["name"].value_counts()
pd.merge(members_education, school_counts[school_counts > 1].reset_index(), left_on="name", right_on="name")[
    ["name", "member", "party", "school_sector", "count"]].sort_values(["name", "member"])
Out[22]:
name member party school_sector count
78 Alfred Deakin High School Andrew Laming Coalition Public 2
77 Alfred Deakin High School Phillip Thompson Coalition Public 2
47 Anglican Church Grammar School Glenn Sterle Australian Labor Party Non-government 2
46 Anglican Church Grammar School Matt Thistlethwaite Australian Labor Party Non-government 2
29 Aquinas College Celia Hammond Coalition Non-government 2
... ... ... ... ... ...
33 Wesley College, Victoria Shayne Neumann Australian Labor Party Non-government 2
8 Woonona High School Karen Grogan Australian Labor Party Public 2
9 Woonona High School Mary Doyle Australian Labor Party Public 2
20 Xavier College David Pocock Independent Non-government 2
21 Xavier College Katie Allen Coalition Non-government 2

83 rows × 5 columns

In [23]:
# select members who don't have a high school in education data
pd.read_sql(
    "SELECT * FROM members WHERE id not in (SELECT member_id FROM member_education JOIN education e on member_education.education_id = e.id WHERE e.is_high_school = TRUE ) AND (high_school is null or high_school != 'International' ) ",
    engine)
Out[23]:
id orig_id orig_table member district is_senator is_representative graduated start wiki_link dob mp_id party_id chamber high_school preferred_name aph_name
0 86 3 47 Alex Antic South Australia True False True 2019-07-01 http://www.wikidata.org/entity/Q63520981 1974-12-22 269375 42 senate Public Alex Antic Alexander Antic
1 122 12 46 Vince Connelly Stirling False True True 2019-05-18 http://www.wikidata.org/entity/Q64585316 1978-09-16 282984 42 house Non-government Vince Connelly Vincent Connelly
2 90 106 47 Karen Grogan South Australia True False True 2021-09-21 http://www.wikidata.org/entity/Q108617920 1960-01-01 296331 11 senate None None Karen Grogan
3 243 21 46 Nicolle Flint Boothby False True True 2016-07-02 http://www.wikidata.org/entity/Q25756083 1978-07-15 245550 42 house Non-government None Nicolle Flint
4 75 151 47 Michelle Ananda-Rajah Higgins False True True 2022-05-21 http://www.wikidata.org/entity/Q90398588 1972-12-10 290544 11 house None None Michelle Ananda-Rajah
5 142 188 47 Sam Birrell Nicholls False True True 2022-05-21 http://www.wikidata.org/entity/Q112131083 1975-01-01 288713 42 house None None Sam Birrell
6 99 155 47 Milton Dick Oxley False True True 2016-07-02 http://www.wikidata.org/entity/Q20983429 1972-07-21 53517 11 house Non-government Milton Dick Dugald Dick
7 93 173 47 Peter Khalil Wills False True True 2016-07-02 http://www.wikidata.org/entity/Q25756187 1973-03-23 101351 11 house Non-government None Peter Khalil
8 251 182 47 Rob Mitchell McEwen False True True 2010-08-21 http://www.wikidata.org/entity/Q7340383 1967-09-09 M3E 11 house Public Rob Mitchell Robert Mitchell
9 152 123 47 Llew O'Brien Wide Bay False True False 2016-07-02 http://www.wikidata.org/entity/Q25756234 1972-06-26 265991 42 house None Llew O'Brien Llewellyn O'Brien
10 57 75 47 Graham Perrett Moreton False True True 2007-11-24 http://www.wikidata.org/entity/Q5593142 1966-01-05 HVP 11 house Public None Graham Perrett
11 23 200 47 Stephen Bates Brisbane False True True 2022-05-21 http://www.wikidata.org/entity/Q112150148 1992-11-23 300246 12 house None None Stephen Bates
12 175 219 47 Tracey Roberts Pearce False True True 2022-05-21 http://www.wikidata.org/entity/Q109850050 1960-01-01 157125 11 house None None Tracey Roberts
13 103 48 46 Julian Simmonds Ryan False True True 2019-05-18 http://www.wikidata.org/entity/Q64226194 1985-08-29 282983 42 house Non-government None Julian Simmonds
14 29 49 46 Arthur Sinodinos New South Wales True False True 2011-10-13 http://www.wikidata.org/entity/Q4800293 1957-02-25 BV7 42 senate Public None Arthur Sinodinos
15 160 132 47 Marielle Smith South Australia True False True 2019-07-01 http://www.wikidata.org/entity/Q63531157 1986-12-30 281603 11 senate Both None Marielle Smith
16 207 98 47 Jordon Steele-John Western Australia True False True 2017-11-10 http://www.wikidata.org/entity/Q33139188 1994-10-14 250156 12 senate Home-Schooled None Jordon Steele-John
17 13 86 47 Jana Stewart Victoria True False True 2022-04-06 http://www.wikidata.org/entity/Q111513180 1987-01-01 299352 11 senate None None Jana Stewart
18 222 206 47 Susan Templeman Macquarie False True True 2016-07-02 http://www.wikidata.org/entity/Q25753992 1963-07-30 181810 11 house Public None Susan Templeman
19 67 94 47 Jess Walsh Victoria True False True 2019-07-01 http://www.wikidata.org/entity/Q64711059 1971-05-16 252157 11 senate Non-government None Jess Walsh
20 82 174 47 Peter Whish-Wilson Tasmania True False True 2012-06-20 http://www.wikidata.org/entity/Q7177677 1968-02-24 195565 12 senate Both None Peter Whish-Wilson
In [24]:
# Do percentage change in public to non-goverment school from parliamnent 46 to 47
ph_46 = pd.read_sql("SELECT * FROM member_secondary_school_education_46", engine)
ph_47 = pd.read_sql("SELECT * FROM member_secondary_school_education_47", engine)
In [25]:
school_sector_counts_46 = ph_46.drop_duplicates(["member", "school_sector"]).groupby(
    ["party", "school_sector"]).size().reset_index(name='count')
school_sector_counts_46["%"] = (school_sector_counts_46["count"] / school_sector_counts_46.groupby("party")[
    "count"].transform("sum")) * 100
school_sector_counts_46["parliament"] = "46th Parliament"

school_sector_counts_47 = ph_47.drop_duplicates(["member", "school_sector"]).groupby(
    ["party", "school_sector"]).size().reset_index(name='count')
school_sector_counts_47["%"] = (school_sector_counts_47["count"] / school_sector_counts_47.groupby("party")[
    "count"].transform("sum")) * 100
school_sector_counts_46["parliament"] = "46th Parliament"
school_sector_counts_47["parliament"] = "47th Parliament"
# calculate differences between school_sector_counts_46 and school_sector_counts
pd.concat([school_sector_counts_46, school_sector_counts_47]).sort_values(["party", "school_sector"])
Out[25]:
party school_sector count % parliament
0 Australian Greens Non-government 9 75.000000 46th Parliament
0 Australian Greens Non-government 6 33.333333 47th Parliament
1 Australian Greens Public 3 25.000000 46th Parliament
1 Australian Greens Public 12 66.666667 47th Parliament
2 Australian Labor Party Non-government 48 49.484536 46th Parliament
2 Australian Labor Party Non-government 60 53.571429 47th Parliament
3 Australian Labor Party Public 49 50.515464 46th Parliament
3 Australian Labor Party Public 52 46.428571 47th Parliament
4 Centre Alliance Non-government 1 50.000000 46th Parliament
4 Centre Alliance Non-government 2 66.666667 47th Parliament
5 Centre Alliance Public 1 50.000000 46th Parliament
5 Centre Alliance Public 1 33.333333 47th Parliament
6 Coalition Non-government 63 56.250000 46th Parliament
6 Coalition Non-government 67 52.755906 47th Parliament
7 Coalition Public 49 43.750000 46th Parliament
7 Coalition Public 60 47.244094 47th Parliament
8 Independent Non-government 4 66.666667 46th Parliament
8 Independent Non-government 9 64.285714 47th Parliament
9 Independent Public 2 33.333333 46th Parliament
9 Independent Public 5 35.714286 47th Parliament
10 Jacqui Lambie Network Non-government 1 50.000000 47th Parliament
10 Jacqui Lambie Network Public 1 100.000000 46th Parliament
11 Jacqui Lambie Network Public 1 50.000000 47th Parliament
11 Katter's Australian Party Public 1 100.000000 46th Parliament
12 Liberal Democratic Party Non-government 1 100.000000 46th Parliament
12 Liberal Democratic Party Non-government 1 100.000000 47th Parliament
13 Pauline Hanson's One Nation Non-government 1 50.000000 47th Parliament
13 Pauline Hanson's One Nation Public 1 100.000000 46th Parliament
14 Pauline Hanson's One Nation Public 1 50.000000 47th Parliament
15 United Australia Party Non-government 1 100.000000 47th Parliament
16 United Australia Party [2018] Non-government 1 100.000000 47th Parliament
In [26]:
school_sector_counts_prev = ph_46.drop_duplicates(["member", "al_school_sector"]).groupby(
    ["al_school_sector"]).size().reset_index(name='count')
school_sector_counts_prev.rename(columns={"al_school_sector": "school_sector"}, inplace=True)
school_sector_counts_prev["%"] = (school_sector_counts_prev["count"] / school_sector_counts_prev["count"].sum()) * 100
school_sector_counts_prev["group"] = "Australian Politicians 46th Parliament"

school_sector_counts = ph_47.drop_duplicates(["member", "al_school_sector"]).groupby(
    ["al_school_sector"]).size().reset_index(name='count')
school_sector_counts.rename(columns={"al_school_sector": "school_sector"}, inplace=True)
school_sector_counts["%"] = (school_sector_counts["count"] / school_sector_counts["count"].sum()) * 100
school_sector_counts["group"] = "Australian Politicians 47th Parliament"

fig = px.bar(pd.concat([school_sector_counts, school_sector_counts_prev, pop_school_sector]), x="school_sector", y="%",
             color="group",
             barmode="group", title="School sector breakdown of Australian politicians past 2 parliaments")
fig.show()
In [27]:
from pywaffle import Waffle
import matplotlib.pyplot as plt
# slightly different to SMH as they set those who did not graduate or who attended overseas to other
members_46 = pd.read_sql("SELECT * FROM member_aph_46", engine)
members_46_high_school = members_46.groupby("high_school").size().reset_index(name='count')
members_46_high_school["%"] = (members_46_high_school["count"] / members_46_high_school["count"].sum()) * 100
members_46_high_school
Out[27]:
high_school count %
0 Both 17 7.327586
1 Home-Schooled 1 0.431034
2 International 2 0.862069
3 Non-government 114 49.137931
4 Public 98 42.241379